from pyspark.sql import SparkSession

if __name__ == '__main__':
    spark = SparkSession.builder.appName("test").master("local[*]").getOrCreate()

    sc = spark.sparkContext

    # hello world
    df = spark.read.csv("../data/input/stu_score.txt", sep=',', header=False)
    df2 = df.toDF("id", "name", "score")
    df2.printSchema()
    df2.show()

    df2.createOrReplaceTempView("score")
    spark.sql("""
    select * from score where name='语文' limit 5
    """).show()

    # DSL 风格
    df2.where("name='语文'").limit(5).show()
